<!DOCTYPE html>
<html>

<head>
    <meta http-equiv="content-type" content="text/html; charset=utf-8">
    
    <meta http-equiv="content-language" content="zh-CN" />
    

    
    <meta name="viewport" content="width=device-width, initial-scale=0.5">
    

    
    <title>Rweka包解读</title>
    <script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.8/clipboard.min.js"></script>
    
    
    
    
    <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/bootstrap@3.3.7/dist/css/bootstrap.min.css">

    
    <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/bootstrap@3.3.7/dist/css/bootstrap-theme.min.css">

    <link rel="stylesheet" href="/css/stylesheet.css">
    <link rel="stylesheet" href="/css/home.css">

    
    
        <style type="text/css">
        body { background-color: #fbf6ec;}
        </style>
    
    
                
        
        
            <link rel="stylesheet" href="/css/main.css"/>
        




        
        
        
        <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/10.3.2/styles/github.min.css"  />
         
        
        <script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/10.3.2/highlight.min.js"></script>
        
        
        <script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/10.3.2/languages/r.min.js"></script>
        
        <script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/10.3.2/languages/yaml.min.js"></script>
        
        <script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/10.3.2/languages/latex.min.js"></script>
        
        <script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/10.3.2/languages/matlab.min.js"></script>
        
        <script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/10.3.2/languages/mathematica.min.js"></script>
        
        <script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/10.3.2/languages/julia.min.js"></script>
        
        <script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/10.3.2/languages/julia-repl.min.js"></script>
        
        <script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/10.3.2/languages/powershell.min.js"></script>
        
        <script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/10.3.2/languages/bash.min.js"></script>
        
        <script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/10.3.2/languages/shell.min.js"></script>
        
        <script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/10.3.2/languages/python.min.js"></script>
        
        <script>hljs.initHighlightingOnLoad();</script>
     <script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.3.1/jquery.min.js"></script>
          
     <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.15.1/css/all.min.css" integrity="sha512-+4zCK9k+qNFUR5X+cKL9EIR+ZOhtIloNl9GIKS57V1MyNsYpYcUrUeQc9vNfzsWfV28IaLL3i96P9sdNyeRssA==" crossorigin="anonymous" />
     
     
</head>


<body>
    <script>
        window.addEventListener("resize", resizeThrottler, false);

        var resizeTimeout;
        function resizeThrottler() {
        
        if ( !resizeTimeout ) {
            resizeTimeout = setTimeout(function() {
            resizeTimeout = null;
            actualResizeHandler();
        
            
            }, 66);
        }
        }
        actualResizeHandler()
        function actualResizeHandler() {
                if (/mobile/i.test(navigator.userAgent) || /android/i.test(navigator.userAgent))
                {
                    document.body.classList.add('mobile');
                }else{
                    document.body.classList.remove('mobile');  
                }
    }</script>

    
      
      
            <nav class="navbar navbar-default navbar-static-top" style="opacity: .9" role="navigation">
        <div class="container-fluid">
            
            <div class="navbar-header">
                <button type="button" class="navbar-toggle" data-toggle="collapse" data-target="#bs-example-navbar-collapse-1">

                    <span class="sr-only">Toggle navigation</span>
                    <span class="big-icon icon-bar"></span>
                    <span class="big-icon icon-bar"></span>
                    <span class="big-icon icon-bar"></span>

                </button>
                <a class="navbar-brand" href="/">zsc</a>
            </div>

            <div class="navbar-collapse collapse" id="bs-example-navbar-collapse-1" style="height: auto;">
                <ul class="nav navbar-nav navbar-right" style="font-size: 100%">
                    
                        
                            
                            <li class=""><a href="/about/">About</a></li>
                            
                            <li class=""><a href="/categories/">Categories</a></li>
                            
                            <li class=""><a href="/">Home</a></li>
                            
                            <li class=""><a href="/tags/">Tags</a></li>
                            
                            <li class=""><a href="/issue/">存在的问题</a></li>
                            
                        
                    
                </ul>
            </div>
        </div>
    </nav>










<div class="inner">
    



    <div class="blog-post">
        
                <div>
            <h2 align="center" id = "singe-h2">
                Rweka包解读
                <time>
                    <br>
                    <span> 
                        <i class="fa fa-user-edit" style="color:#888;font-size: 80%;"></i>
                        zsc 
                    </span>
                    &nbsp 
                    <span>                 
                        <i class="fa fa-calendar-alt" style="color:#888;font-size: 80%;"></i>
                        2015-11-16 
                    </span>
                </time>
                
                
                <div>
                    <ul class="tags">
                        
                        <span>标签:</span>
                        <li><a class="link" href="/tags/r"> #r </a></li>
                        
                        <span> </span>
                        
                    </ul>
                    
                </div>
            </h2>
        </div>
    
        
        <section id="content">
            <h2 id="1-数据输入和输出">1. 数据输入和输出</h2>
<div class="highlight"><pre tabindex="0" style="color:#f8f8f2;background-color:#272822;-moz-tab-size:4;-o-tab-size:4;tab-size:4;"><code class="language-R" data-lang="R"><span style="display:flex;"><span><span style="color:#a6e22e">WOW</span>()：查看Weka函数的参数。
</span></span><span style="display:flex;"><span><span style="color:#a6e22e">Weka_control</span>()：设置Weka函数的参数。
</span></span><span style="display:flex;"><span><span style="color:#a6e22e">read.arff</span>()：读(ARFF)格式的数据。一般的数据需要用这两个函数（先写再读）进行转换成（arff）格式的数据
</span></span><span style="display:flex;"><span><span style="color:#a6e22e">write.arff</span>()：将数据写入Weka Attribute<span style="color:#f92672">-</span>Relation File <span style="color:#a6e22e">Format </span>(ARFF)格式的文件。
</span></span></code></pre></div><h2 id="2-数据预处理">2. 数据预处理</h2>
<ul>
<li>
<p><code>Normalize()</code>：无监督的标准化连续性数据,即min-max标准化。对于字符、逻辑、因子变量跳过，只对连续数据标准化</p>
</li>
<li>
<p><code>Discretize()</code>：用MDL(最小描述长度)方法，有监督的离散化连续性数值数据。感觉用处不大，把连续的数据全离散成“all”字符</p>
</li>
</ul>
<h2 id="3-分类和回归">3. 分类和回归</h2>
<ul>
<li>
<p><code>IBk()</code>：k最近邻分类，用法和LBR一样</p>
</li>
<li>
<p><code>LBR()</code>：naive Bayes法分类</p>
</li>
</ul>
<div class="highlight"><pre tabindex="0" style="color:#f8f8f2;background-color:#272822;-moz-tab-size:4;-o-tab-size:4;tab-size:4;"><code class="language-R" data-lang="R"><span style="display:flex;"><span>   RWeka包中的IBK函数实现knn算法
</span></span><span style="display:flex;"><span>        <span style="color:#f92672">*</span> model<span style="color:#f92672">=</span><span style="color:#a6e22e">IBk</span>(formula, data, subset, na.action,control <span style="color:#f92672">=</span> <span style="color:#a6e22e">Weka_control</span>(), options <span style="color:#f92672">=</span> <span style="color:#66d9ef">NULL</span>)
</span></span><span style="display:flex;"><span>                <span style="color:#f92672">+</span> formula<span style="color:#f92672">:</span>公式，分类变量<span style="color:#f92672">~</span>特征，和回归一样
</span></span><span style="display:flex;"><span>                <span style="color:#f92672">+</span> data <span style="color:#f92672">:</span> 训练数据集,必须为<span style="color:#f92672">*</span>.raff格式的数据,对于R中的数据，
</span></span><span style="display:flex;"><span>                      <span style="color:#f92672">+</span> 可以先把某个对象先用<span style="color:#a6e22e">write.arff</span>(iris,<span style="color:#e6db74">&#39;iris.arff&#39;</span>)写成raff文件格式
</span></span><span style="display:flex;"><span>                      <span style="color:#f92672">+</span> 再次用<span style="color:#a6e22e">read.arff</span>(<span style="color:#e6db74">&#34;iris.arff&#34;</span>)读入R内存中即可
</span></span><span style="display:flex;"><span>                <span style="color:#f92672">+</span> control<span style="color:#f92672">:</span>参数控制 control<span style="color:#f92672">=</span><span style="color:#a6e22e">Weka_control</span>(K<span style="color:#f92672">=</span> <span style="color:#ae81ff">22</span>,X <span style="color:#f92672">=</span> <span style="color:#66d9ef">TRUE</span>) 
</span></span><span style="display:flex;"><span>                      <span style="color:#f92672">+</span> X K都为大写，表示自动选取<span style="color:#ae81ff">1</span>：K<span style="color:#f92672">=</span><span style="color:#ae81ff">22</span>里面最适合的K近邻分类 
</span></span><span style="display:flex;"><span>             
</span></span><span style="display:flex;"><span>        <span style="color:#f92672">*</span> <span style="color:#a6e22e">evaluate_Weka_classifier</span>(object, newdata <span style="color:#f92672">=</span> <span style="color:#66d9ef">NULL</span>, cost <span style="color:#f92672">=</span> <span style="color:#66d9ef">NULL</span>, 
</span></span><span style="display:flex;"><span>                                   numFolds <span style="color:#f92672">=</span> <span style="color:#ae81ff">0</span>, complexity <span style="color:#f92672">=</span> <span style="color:#66d9ef">FALSE</span>,
</span></span><span style="display:flex;"><span>                                   class <span style="color:#f92672">=</span> <span style="color:#66d9ef">FALSE</span>, seed <span style="color:#f92672">=</span> <span style="color:#66d9ef">NULL</span>, <span style="color:#66d9ef">...</span>)
</span></span><span style="display:flex;"><span>                <span style="color:#f92672">+</span> object<span style="color:#f92672">:</span> 一个Weka_classifier对象,Rweka建立的分类对象模型
</span></span><span style="display:flex;"><span>                <span style="color:#f92672">+</span> newdata<span style="color:#f92672">:</span> 测试数据，若为省略或为<span style="color:#ae81ff">0</span> ，则为训练数据
</span></span><span style="display:flex;"><span>                <span style="color:#f92672">+</span> numFolds： k<span style="color:#f92672">-</span>交叉验证
</span></span></code></pre></div><ul>
<li>
<p>J48()：C4.5决策树算法（决策树在分析各个属性时，是完全独立的）。</p>
</li>
<li>
<p>LMT()：组合树结构和Logistic回归模型，每个叶子节点是一个Logistic回归模型，准确性比单独的决策树和Logistic回归方法要好。</p>
</li>
<li>
<p>M5P()：M5 模型数算法，组合了树结构和线性回归模型，每个叶子节点是一个线性回归模型，因而可用于连续数据的回归。</p>
</li>
<li>
<p>DecisionStump()：单层决策树算法，常被作为boosting的基本学习器。</p>
</li>
<li>
<p>SMO()：支持向量机分类</p>
</li>
<li>
<p>AdaBoostM1()：Adaboost M1方法。-W参数指定弱学习器的算法。</p>
</li>
<li>
<p>Bagging()：通过从原始数据取样(用替换方法)，创建多个模型。</p>
</li>
<li>
<p>LogitBoost()：弱学习器采用了对数回归方法,学习到的是实数值</p>
</li>
<li>
<p>MultiBoostAB()：AdaBoost 方法的改进，可看作AdaBoost 和 “wagging”的组合。</p>
</li>
<li>
<p>Stacking()：用于不同的基本分类器集成的算法。</p>
</li>
<li>
<p>LinearRegression()：建立合适的线性回归模型。</p>
</li>
<li>
<p>Logistic()：建立logistic回归模型。</p>
</li>
<li>
<p>JRip()：一种规则学习方法。</p>
</li>
<li>
<p>M5Rules()：用M5方法产生回归问题的决策规则。</p>
</li>
<li>
<p>OneR()：简单的1-R分类法。</p>
</li>
<li>
<p>PART()：产生PART决策规则。</p>
</li>
</ul>
<h2 id="4-聚类">4) 聚类</h2>
<ul>
<li>Cobweb()：这是种基于模型方法，它假设每个聚类的模型并发现适合相应模型的数据。不适合对大数据库进行聚类处理。</li>
<li>FarthestFirst()：快速的近似的k均值聚类算法</li>
<li>SimpleKMeans()：k均值聚类算法</li>
<li>XMeans()：改进的k均值法，能自动决定类别数</li>
<li>DBScan()：基于密度的聚类方法，它根据对象周围的密度不断增长聚类。它能从含有噪声的空间数据库中发现任意形状的聚类。此方法将一个聚类定义为一组“密度连接”的点集。</li>
</ul>
<h2 id="5关联规则">5）关联规则</h2>
<ul>
<li>Apriori()：Apriori 是关联规则领域里最具影响力的基础算法，是一种广度优先算法，通过多次扫描数据库来获取支持度大于最小支持度的频繁项集。它的理论基础是频繁项集的两个单 调性原则：频繁项集的任一子集一定是频繁的；非频繁项集的任一超集一定是非频繁的。在海量数据的情况下，Apriori 算法的时间和空间成本非常高。</li>
<li>Tertius()：Tertius算法。</li>
</ul>
<h2 id="6预测和评估">6）预测和评估：</h2>
<ul>
<li>predict()：根据分类或聚类结果预测新数据的类别</li>
<li>table()：比较两个因子对象</li>
<li>evaluate_Weka_classifier()：评估模型的执行，如：TP Rate，FP Rate，Precision，Recall，F-Measure。</li>
</ul>

        </section>
    </div>
    <br>
    
    




<span id="/md/2015-11-16-rweka/" class="leancloud_visitors" data-flag-title="Rweka包解读">
  <span class="post-meta-item-text">文章总阅读量 </span>
  <span class="leancloud-visitors-count"><i class="leancloud-visitors-count"></i></span>次;
  <p></p>
</span>



    

    
    
    <button id="edit-button" class="icon-button" type="button" title="Fork and edit" aria-label="Fork and edit" aria-haspopup="true" aria-expanded="false" aria-controls="edit">
        <i class="fa fa-edit">编辑本文</i>
    </button>
    
    
    

    <br>
    <hr>
    <li style="float:left;list-style:none">
        
    </li>
    <li style="float:right;list-style:none">
        
        <a class="next" href="/md/2017-01-30-r%E8%AF%AD%E8%A8%80%E4%B8%8E%E5%8F%AF%E8%A7%86%E5%8C%96/"> 下一篇: R语言与可视化</a>
        
    </li>
     
    
    <script src="/js/copyCode.js"></script>
    <script src="/js/tooltips.js"></script>
    
   
    <script>
    [].slice.call(document.querySelectorAll('table')).forEach(function(el) {
        var wrapper = document.createElement('div');
        wrapper.className = 'table-area';
        el.parentNode.insertBefore(wrapper, el);
        el.parentNode.removeChild(el);
        wrapper.appendChild(el);
        $("table").wrap("<div class='table-area'></div>");
    })
    </script>

    
<br>
<hr>


<!-- Global site tag (gtag.js) - Google Analytics -->
<script async src="https://www.googletagmanager.com/gtag/js?id=UA-111691389-1"></script>
<script>
  window.dataLayer = window.dataLayer || [];
  function gtag() { dataLayer.push(arguments); }
  gtag('js', new Date());

  gtag('config', 'UA-111691389-1');
</script>




      
      
      

       
      
      
      <script>
              document.getElementById("edit-button").addEventListener("click", function () {
                  var editWindow = window.open("https:\/\/github.com\/zoushucai\/blogmmm/edit/master/content/md\/2015-11-16-Rweka.md");
              });</script>
      
          




<script>
  function resizeIframe(obj) {
    obj.style.height = obj.contentWindow.document.body.scrollHeight + 'px';
  } 
</script>



    </style>
    <script type="text/javascript">
    function showdiv(){
        document.getElementById("divtocTableOfContents").style.display="block";
        document.getElementById("strHref").innerHTML="目录收起-";
        document.getElementById('divTableOfContents').style.width="22%";
        document.getElementById('divTableOfContents').style.height="55%";
        document.getElementById('divTableOfContents').style.top="25%";
        document.getElementById('divTableOfContents').style.bottom="5%";
        document.getElementById("strHref").href="javascript:hidediv()";
    }
    function hidediv(){
        document.getElementById("divtocTableOfContents").style.display="none";
        document.getElementById("strHref").innerHTML="目录展开+";
        document.getElementById("strHref").href="javascript:showdiv()";
        document.getElementById('divTableOfContents').style.width="10%";
        document.getElementById('divTableOfContents').style.height="5%";
    }
    </script>
</body>

</html>
</div> 







    <script defer src="https://cdn.jsdelivr.net/npm/katex@0.12.0/dist/contrib/mathtex-script-type.min.js" integrity="sha384-LJ2FmexL77rmGm6SIpxq7y+XA6bkLzGZEgCywzKOZG/ws4va9fUVu2neMjvc3zdv" crossorigin="anonymous"></script>

    <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/katex@0.12.0/dist/katex.min.css">
    <script defer src="https://cdn.jsdelivr.net/npm/katex@0.12.0/dist/katex.min.js"></script>
    <script defer src="https://cdn.jsdelivr.net/npm/katex@0.12.0/dist/contrib/auto-render.min.js"></script>
    <script>
        document.addEventListener("DOMContentLoaded", function() {
            renderMathInElement(document.body, {
            delimiters: [
                            {left: "$$", right: "$$", display: true},
                            {left: "$", right: "$", display: false},
                            {left: "\\(", right: "\\)", display: false},
                            {left: "\\[", right: "\\]", display: true}
                        ]
            });
        });
    </script>













<br>
<div class="inner">
              
            
          
          
  
          
  
  <div id="vcomments"></div>
  
  <script src="//cdn1.lncld.net/static/js/3.0.4/av-min.js"></script>
  
  <script src='//unpkg.com/valine/dist/Valine.min.js'></script>
  <script type="text/javascript">
    new Valine({
        el: '#vcomments' ,
        appId: 'HfHPKPkLa0cBEDPcdBAHuqMv-gzGzoHsz',
        appKey: 'r5RJAasN8e0mB9sq6y9pEcX0',
        lang:'zh-CN',
        notify:  false , 
        verify:  false  ,
        avatar:'identicon', 
        placeholder: '说点什么吧...',
        visitor:  true 
    });
  </script>

</div>

<br>
<br>
<footer>
    <p style="float:right;margin-right: 5%;margin-top: 0%;">
        &copy; 2022 <a href="https://github.com/zoushucai">zsc</a>
      </p>
</footer>
<br>
<br>


